{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 4 Pandas的索引操作"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T03:16:17.847161Z",
     "start_time": "2025-02-26T03:16:17.518850Z"
    }
   },
   "outputs": [],
   "execution_count": 1
  },
  {
   "cell_type": "code",
   "source": [
    "\n",
    "dict_data = {'A': 1,\n",
    "             'B': pd.Timestamp('20190926'),\n",
    "             'C': pd.Series(1, index=list(range(4)),dtype='float32'),\n",
    "             'D': np.array([1,2,3,4],dtype='int32'),\n",
    "             'E': [\"Python\",\"Java\",\"C++\",\"C\"],\n",
    "             'F': 'wangdao' }\n",
    "df_obj2 = pd.DataFrame(dict_data)\n",
    "print(df_obj2.index)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:16:53.408142Z",
     "start_time": "2025-02-26T03:16:53.404445Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([0, 1, 2, 3], dtype='int64')\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [],
   "source": [
    "# 索引对象的值不可变（上面代码增加）\n",
    "# df_obj2.index[0] = 2"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T03:17:03.702103Z",
     "start_time": "2025-02-26T03:17:03.694493Z"
    }
   },
   "cell_type": "code",
   "source": "df_obj2",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "   A          B    C  D       E        F\n",
       "0  1 2019-09-26  1.0  1  Python  wangdao\n",
       "1  1 2019-09-26  1.0  2    Java  wangdao\n",
       "2  1 2019-09-26  1.0  3     C++  wangdao\n",
       "3  1 2019-09-26  1.0  4       C  wangdao"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-26</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Python</td>\n",
       "      <td>wangdao</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-26</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>Java</td>\n",
       "      <td>wangdao</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-26</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>C++</td>\n",
       "      <td>wangdao</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-26</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>C</td>\n",
       "      <td>wangdao</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "markdown",
   "source": [
    "3 常见的Index种类\n",
    "•Index，索引  可以是各种类型\n",
    "•Int64Index，整数索引\n",
    "•MultiIndex，层级索引，难度较大\n",
    "•DatetimeIndex，时间戳类型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "ser_obj = pd.Series(range(5), index = list(\"abcde\"))\n",
    "print(ser_obj)\n",
    "ser_obj.index"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:17:30.743537Z",
     "start_time": "2025-02-26T03:17:30.738920Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['a', 'b', 'c', 'd', 'e'], dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "# 行索引，不仅可以用索引名，可以用索引位置或来取\n",
    "print(ser_obj['b']) #索引名\n",
    "print(ser_obj.iloc[2]) #索引位置"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:19:34.915573Z",
     "start_time": "2025-02-26T03:19:34.913020Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "source": [
    "# 切片索引\n",
    "print(ser_obj[1:3])  #索引位置取数据，左闭右开\n",
    "print(ser_obj['b':'d'])  #记住索引名  左闭右闭"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:20:21.974875Z",
     "start_time": "2025-02-26T03:20:21.971258Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "source": [
    "# 不连续索引\n",
    "print(ser_obj.iloc[[0, 2, 4]])\n",
    "print(ser_obj[['a', 'e']])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T03:20:33.090127Z",
     "start_time": "2025-02-26T03:20:33.086929Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "c    2\n",
      "e    4\n",
      "dtype: int64\n",
      "a    0\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "source": [
    "# 布尔索引\n",
    "ser_bool = ser_obj > 2\n",
    "print(ser_obj)\n",
    "print(ser_bool)\n",
    "print('-'*50)\n",
    "print(ser_obj[ser_bool])\n",
    "\n",
    "print(ser_obj[ser_obj > 2]) #取出大于2的元素"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:07:00.543665Z",
     "start_time": "2025-02-26T06:07:00.529319Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "a    False\n",
      "b    False\n",
      "c    False\n",
      "d     True\n",
      "e     True\n",
      "dtype: bool\n",
      "--------------------------------------------------\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 4.4 DataFrame索引"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import numpy as np\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = ['a', 'b', 'c', 'd'])\n",
    "print(df_obj.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:07:46.510768Z",
     "start_time": "2025-02-26T06:07:46.505489Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "0 -1.072744  0.909464 -0.304105  0.613053\n",
      "1  0.204402 -1.512798 -0.510262 -0.120146\n",
      "2 -0.860123  0.336391 -0.499397 -0.224446\n",
      "3 -0.825789  0.628703 -0.337707 -1.192812\n",
      "4 -2.072761 -1.774720 -0.101769  0.580371\n"
     ]
    }
   ],
   "execution_count": 13
  },
  {
   "cell_type": "code",
   "source": [
    "# 列索引\n",
    "print(df_obj['a']) # 返回Series类型\n",
    "print('-'*50)\n",
    "print(df_obj[['a']]) # 返回DataFrame类型\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:09:14.611374Z",
     "start_time": "2025-02-26T06:09:14.607255Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -1.072744\n",
      "1    0.204402\n",
      "2   -0.860123\n",
      "3   -0.825789\n",
      "4   -2.072761\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "          a\n",
      "0 -1.072744\n",
      "1  0.204402\n",
      "2 -0.860123\n",
      "3 -0.825789\n",
      "4 -2.072761\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 15
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T06:09:19.540169Z",
     "start_time": "2025-02-26T06:09:19.537107Z"
    }
   },
   "cell_type": "code",
   "source": "print(type(df_obj[['a']])) # 返回DataFrame类型",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "execution_count": 16
  },
  {
   "cell_type": "markdown",
   "source": [
    "1. loc 标签索引(通过索引标签值获取数据)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 标签索引 loc，建议使用loc，效率更高\n",
    "# Series\n",
    "print(ser_obj)\n",
    "print(ser_obj['b':'d']) #不建议\n",
    "print(ser_obj.loc['b':'d']) #前闭后闭\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:10:24.222589Z",
     "start_time": "2025-02-26T06:10:24.218819Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df_obj = pd.DataFrame(np.random.randn(5,4),\n",
    "                      columns = list('abcd'),\n",
    "                      index=list('abcde'))\n",
    "print(df_obj)\n",
    "print('-'*50)\n",
    "print(df_obj['a'])  #建议不用,拿的是列\n",
    "print('-'*50)\n",
    "print(df_obj.loc['a'])  #拿的是行\n",
    "print('-'*50)\n"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T06:12:25.550597Z",
     "start_time": "2025-02-26T06:12:25.545520Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a -0.999098 -1.831602 -1.495320 -0.134011\n",
      "b  1.100559  0.067801 -0.753872 -2.196088\n",
      "c  1.239953  1.177998  2.279126 -0.778878\n",
      "d  0.396392 -1.005760 -0.530320  0.958080\n",
      "e  0.388883 -0.103025  0.526778 -0.412634\n",
      "--------------------------------------------------\n",
      "a   -0.999098\n",
      "b    1.100559\n",
      "c    1.239953\n",
      "d    0.396392\n",
      "e    0.388883\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n",
      "a   -0.999098\n",
      "b   -1.831602\n",
      "c   -1.495320\n",
      "d   -0.134011\n",
      "Name: a, dtype: float64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T06:13:34.523838Z",
     "start_time": "2025-02-26T06:13:34.510635Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 第一个参数索引行，第二个参数是列,loc或者iloc效率高于直接用取下标的方式，前闭后闭\n",
    "print(df_obj.loc['a':'c', 'b':'d']) #连续索引\n",
    "print(df_obj.loc[['a','c'], ['b','d']]) #不连续索引\n",
    "print(df_obj.loc[['c'],['b']]) #取一个值,返回的是DataFrame类型\n",
    "print(df_obj.loc['c','b'])  #取一个值"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          b         c         d\n",
      "a -1.831602 -1.495320 -0.134011\n",
      "b  0.067801 -0.753872 -2.196088\n",
      "c  1.177998  2.279126 -0.778878\n",
      "          b         d\n",
      "a -1.831602 -0.134011\n",
      "c  1.177998 -0.778878\n",
      "          b\n",
      "c  1.177998\n",
      "1.1779977057251914\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "markdown",
   "source": [
    "## iloc 位置索引(推荐使用)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "ser_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T06:14:13.716506Z",
     "start_time": "2025-02-26T06:14:13.711443Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "c    2\n",
       "d    3\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "data": {
      "text/plain": "          a         b         c         d\na  0.722203  0.710413 -0.415365 -0.115376\nb -1.094396  0.902924 -1.357799  0.607512\nc -1.126615 -1.538308 -0.593946 -1.067405\nd -2.039717 -0.093051  1.797557  0.957792\ne  1.311567 -0.856298 -0.460529  0.705077",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>a</th>\n      <th>b</th>\n      <th>c</th>\n      <th>d</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>a</th>\n      <td>0.722203</td>\n      <td>0.710413</td>\n      <td>-0.415365</td>\n      <td>-0.115376</td>\n    </tr>\n    <tr>\n      <th>b</th>\n      <td>-1.094396</td>\n      <td>0.902924</td>\n      <td>-1.357799</td>\n      <td>0.607512</td>\n    </tr>\n    <tr>\n      <th>c</th>\n      <td>-1.126615</td>\n      <td>-1.538308</td>\n      <td>-0.593946</td>\n      <td>-1.067405</td>\n    </tr>\n    <tr>\n      <th>d</th>\n      <td>-2.039717</td>\n      <td>-0.093051</td>\n      <td>1.797557</td>\n      <td>0.957792</td>\n    </tr>\n    <tr>\n      <th>e</th>\n      <td>1.311567</td>\n      <td>-0.856298</td>\n      <td>-0.460529</td>\n      <td>0.705077</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:08:14.103017400Z",
     "start_time": "2024-04-11T03:08:14.027298500Z"
    }
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-02-26T06:14:24.537151Z",
     "start_time": "2025-02-26T06:14:24.533556Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# Series\n",
    "print(ser_obj[1:3])\n",
    "print(ser_obj.iloc[1:3]) # 前闭后开[)，效率高\n",
    "print('-'*50)"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "b    1\n",
      "c    2\n",
      "dtype: int64\n",
      "--------------------------------------------------\n"
     ]
    }
   ],
   "execution_count": 21
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame，iloc是前闭后开[)\n",
    "print(df_obj)\n",
    "print(df_obj.iloc[0:2, 0:2]) # 注意和df_obj.loc[0:2, 'a']的区别\n",
    "print(df_obj.iloc[[0,2], [0,2]]) # 不连续索引\n",
    "print(df_obj.iloc[0,0]) # 取一个值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-26T06:14:32.959835Z",
     "start_time": "2025-02-26T06:14:32.954275Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          a         b         c         d\n",
      "a -0.999098 -1.831602 -1.495320 -0.134011\n",
      "b  1.100559  0.067801 -0.753872 -2.196088\n",
      "c  1.239953  1.177998  2.279126 -0.778878\n",
      "d  0.396392 -1.005760 -0.530320  0.958080\n",
      "e  0.388883 -0.103025  0.526778 -0.412634\n",
      "          a         b\n",
      "a -0.999098 -1.831602\n",
      "b  1.100559  0.067801\n",
      "          a         c\n",
      "a -0.999098 -1.495320\n",
      "c  1.239953  2.279126\n",
      "-0.9990977981235025\n"
     ]
    }
   ],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "source": [
    "#没有设置行和列索引的DataFrame，iloc和loc的区别\n",
    "df_obj2 = pd.DataFrame(np.random.randn(5,4))\n",
    "print(df_obj2)\n",
    "print('-'*50)\n",
    "print(df_obj2.iloc[0:2]) #左闭右开\n",
    "print('-'*50)\n",
    "print(df_obj2.loc[0:2]) #左闭右闭"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-02-26T06:15:22.631606Z",
     "start_time": "2025-02-26T06:15:22.626665Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -0.121568 -0.131784  0.504275  0.314764\n",
      "1  3.037141  0.790205 -0.088647 -2.677519\n",
      "2 -0.298099  1.137497  0.866289  0.823224\n",
      "3 -0.328559  0.773870 -0.721010  0.964544\n",
      "4 -0.388196  1.634064  1.467206 -0.339671\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0 -0.121568 -0.131784  0.504275  0.314764\n",
      "1  3.037141  0.790205 -0.088647 -2.677519\n",
      "--------------------------------------------------\n",
      "          0         1         2         3\n",
      "0 -0.121568 -0.131784  0.504275  0.314764\n",
      "1  3.037141  0.790205 -0.088647 -2.677519\n",
      "2 -0.298099  1.137497  0.866289  0.823224\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "markdown",
   "source": "# 5.对齐运算(不重要，放到最后讲解）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s1+s2: \n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5     NaN\n",
      "6     NaN\n",
      "7     NaN\n",
      "8     NaN\n",
      "9     NaN\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "s1 = pd.Series(range(10, 20), index = range(10))\n",
    "s2 = pd.Series(range(20, 25), index = range(5))\n",
    "# Series 对齐运算\n",
    "print('s1+s2: ')\n",
    "s3=s1+s2\n",
    "print(s3)  #缺失数据默认是NaN  np.nan"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:13:46.273122200Z",
     "start_time": "2024-04-11T03:13:46.229148400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1,)\n",
      "[2 3 4 5 6]\n"
     ]
    }
   ],
   "source": [
    "#两个长度不同的一维ndarray相加\n",
    "a1 = np.array([1,2,3,4,5])\n",
    "a2 = np.array([1]) # 长度为1\n",
    "print(a2.shape)\n",
    "print(a1+a2)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:17:17.264730200Z",
     "start_time": "2024-04-11T03:17:17.253735200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "--------------------------------------------------\n",
      "0    30.0\n",
      "1    32.0\n",
      "2    34.0\n",
      "3    36.0\n",
      "4    38.0\n",
      "5    15.0\n",
      "6    16.0\n",
      "7    17.0\n",
      "8    18.0\n",
      "9    19.0\n",
      "dtype: float64\n",
      "0    10.0\n",
      "1    10.0\n",
      "2    10.0\n",
      "3    10.0\n",
      "4    10.0\n",
      "5   -15.0\n",
      "6   -16.0\n",
      "7   -17.0\n",
      "8   -18.0\n",
      "9   -19.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(np.isnan(s3[6]))\n",
    "print('-'*50)\n",
    "print(s2.add(s1, fill_value = 0))  #未对齐的数据将和填充值做运算\n",
    "print(s2.sub(s1, fill_value = 0))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:18:59.732425700Z",
     "start_time": "2024-04-11T03:18:59.697446300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     a    b\n",
      "0  1.0  1.0\n",
      "1  1.0  1.0\n",
      "     a    b    c\n",
      "0  1.0  1.0  1.0\n",
      "1  1.0  1.0  1.0\n",
      "2  1.0  1.0  1.0\n",
      "--------------------------------------------------\n",
      "a    float64\n",
      "b    float64\n",
      "c    float64\n",
      "dtype: object\n",
      "     a    b   c\n",
      "0  0.0  0.0 NaN\n",
      "1  0.0  0.0 NaN\n",
      "2  NaN  NaN NaN\n",
      "     a    b    c\n",
      "0  0.0  0.0 -1.0\n",
      "1  0.0  0.0 -1.0\n",
      "2 -1.0 -1.0 -1.0\n"
     ]
    }
   ],
   "source": [
    "#df的对齐运算\n",
    "import numpy as np\n",
    "df1 = pd.DataFrame(np.ones((2,2)), columns = ['a', 'b'])\n",
    "df2 = pd.DataFrame(np.ones((3,3)), columns = ['a', 'b', 'c'])\n",
    "print(df1)\n",
    "print(df2)\n",
    "print('-'*50)\n",
    "print(df2.dtypes)\n",
    "print(df1-df2)\n",
    "print(df2.sub(df1, fill_value = 2)) #未对齐的数据将和填充值做运算"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:19:41.996698500Z",
     "start_time": "2024-04-11T03:19:41.963716500Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
